For the DESeq2 analysis, the raw, prefiltered counts will be used.
se.gastroc <- readRDS("./data/Robjects/01_se.gastroc.rds")
se.soleus <- readRDS("./data/Robjects/01_se.soleus.rds")
counts.gastroc <- se.gastroc[rowData(se.gastroc)$filtered,] %>% assay()
counts.soleus <- se.soleus[rowData(se.soleus)$filtered,] %>% assay()
metadata <- colData(se.gastroc)
createDDSObject <- function(counts, metadata) {
# select sample columns
reorder_index <- match(rownames(metadata), colnames(counts))
counts <- counts[,reorder_index]
# Check metadata consistency
all(rownames(metadata) %in% colnames(counts)) %>%
assertthat::assert_that(., msg = "metadata and count table do not match")
## DESeq2 object
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = metadata,
design = ~ genotype)
return( DESeq(dds) )
}
# creating DESeq Objects
dds.gastroc <- createDDSObject(counts.gastroc, metadata)
dds.soleus <- createDDSObject(counts.soleus, metadata)
# creating SESeqg Objects from SE
# dds.gastroc.se <- DESeqDataSet(se.gastroc, design = ~ genotype)
# dds.soleus.se <- DESeqDataSet(se.soleus, design = ~ genotype)
res.gastroc <- results(dds.gastroc, alpha = pCutoff, contrast = c("genotype", "KO", "WT"))
res.soleus <- results(dds.soleus, alpha = pCutoff, contrast = c("genotype", "KO", "WT"))
plotMA(res.gastroc)
plotMA(res.soleus)
plotDispEsts(dds.gastroc)
plotDispEsts(dds.soleus)
topGenes.gastroc <- as.data.frame(res.gastroc) %>%
tibble::rownames_to_column("GeneID") %>%
top_n(100, wt = -padj) %>%
arrange(padj)
topGenes.soleus <- as.data.frame(res.soleus) %>%
tibble::rownames_to_column("GeneID") %>%
top_n(100, wt = -padj) %>%
arrange(padj)
knitr::kable(head(topGenes.gastroc), caption = "gastroc")
| GeneID | baseMean | log2FoldChange | lfcSE | stat | pvalue | padj |
|---|---|---|---|---|---|---|
| ENSMUSG00000050335 | 4934.2949 | 6.267396 | 0.1664065 | 37.66316 | 0 | 0 |
| ENSMUSG00000081249 | 1378.5460 | 4.208222 | 0.1115955 | 37.70959 | 0 | 0 |
| ENSMUSG00000010751 | 1864.4937 | 5.610357 | 0.2014267 | 27.85309 | 0 | 0 |
| ENSMUSG00000032712 | 5136.5059 | 4.686418 | 0.1683204 | 27.84225 | 0 | 0 |
| ENSMUSG00000034855 | 554.3475 | 7.276611 | 0.2735037 | 26.60517 | 0 | 0 |
| ENSMUSG00000037613 | 960.7558 | 3.183338 | 0.1266510 | 25.13472 | 0 | 0 |
knitr::kable(head(topGenes.soleus), caption = "soleus")
| GeneID | baseMean | log2FoldChange | lfcSE | stat | pvalue | padj |
|---|---|---|---|---|---|---|
| ENSMUSG00000038615 | 32047.185 | -2.5454285 | 0.0636829 | -39.97038 | 0 | 0 |
| ENSMUSG00000081249 | 1651.592 | 4.5759032 | 0.1292779 | 35.39587 | 0 | 0 |
| ENSMUSG00000039067 | 4355.557 | -1.1049116 | 0.0419786 | -26.32084 | 0 | 0 |
| ENSMUSG00000028932 | 3190.657 | -1.1102258 | 0.0558536 | -19.87741 | 0 | 0 |
| ENSMUSG00000006998 | 7402.617 | -0.9196943 | 0.0468631 | -19.62511 | 0 | 0 |
| ENSMUSG00000102869 | 6985.622 | -0.9181929 | 0.0479924 | -19.13203 | 0 | 0 |
hist(res.gastroc$pvalue, main = "gastroc")
hist(res.soleus$pvalue, main = "soleus")
volcanoPlot <- function(result, se, pCutoff = 0.05, FCutoff = 1, tissue = character()) {
gene_names <-
rowData(se)[rownames(result), c("gene_name"), drop = F]
results.df <- result %>%
as.data.frame() %>%
dplyr::arrange(padj)
# top 10 gene labels for respectively up and down regulation
labs.up <- results.df[results.df$log2FoldChange > FCutoff, ] %>%
rownames() %>% .[1:10] %>% gene_names[., c("gene_name")]
labs.down <- results.df[results.df$log2FoldChange < -FCutoff, ] %>%
rownames() %>% .[1:10] %>% gene_names[., c("gene_name")]
selectLab <- c(labs.up, labs.down, "Nfe2l1") %>% unique() # always including "Nfe2l1"
# custom colors:
keyvals <- ifelse(
result$log2FoldChange < -FCutoff &
result$padj < pCutoff,
'royalblue',
ifelse(
result$log2FoldChange > FCutoff &
result$padj < pCutoff,
'red',
'gray'
)
)
keyvals[is.na(keyvals)] <- 'gray'
names(keyvals)[keyvals == 'red'] <- 'up regulated'
names(keyvals)[keyvals == 'gray'] <- 'nonsignificant'
names(keyvals)[keyvals == 'royalblue'] <- 'down regulated'
vlc.plt <- EnhancedVolcano(
result,
x = 'log2FoldChange',
y = 'padj',
title = 'WT vs KO: Nfe2l1 knockout',
subtitle = ifelse(isEmpty(tissue), "", paste0('tissue: ', tissue)),
caption = "",
ylab = expression(paste(-Log[10], p[adj])),
pCutoff = pCutoff,
FCcutoff = FCutoff,
legendPosition = 'right',
pointSize = 2,
colCustom = keyvals,
lab = gene_names$gene_name,
selectLab = selectLab,
labSize = 3,
boxedLabels = TRUE,
drawConnectors = TRUE,
max.overlaps = Inf
)
return(vlc.plt)
}
volcanoPlot(res.gastroc, se.gastroc, pCutoff = pCutoff, FCutoff = FCutoff, tissue = "gastrocnemius")
volcanoPlot(res.soleus, se.soleus, pCutoff = pCutoff, FCutoff = FCutoff, tissue = "soleus")
setBold <- function(src, special_labs) {
# source: https://stackoverflow.com/questions/39694490/highlighting-individual-axis-labels-in-bold-using-ggplot2
if (!is.factor(src)) src <- factor(src)
src_levels <- base::levels(src)
brave <- special_labs %in% src_levels
b_vec <- rep("plain", length(src_levels))
if (all(brave)) {
b_pos <- purrr::map_int(special_labs, ~which(.==src_levels))
b_vec[b_pos] <- "bold"
b_vec
} else {
message("setBold: no matching element found")
}
return(b_vec)
}
getTopExpressedEnsemblNames <-
function(result,
FCutoff,
n,
up = T) {
.filter <- ifelse(up, `>`, `<`)
subset(result, .filter(log2FoldChange, FCutoff)) %>%
data.frame() %>%
filter(baseMean > 100) %>%
arrange(padj) %>%
.[1:n, ] %>%
rownames()
}
boxplot.top <- function(result, dds, se, upregulated=TRUE, n = 20, FCutoff = 1, tissue ="") {
# getting the top n regulated genes
names.top <- getTopExpressedEnsemblNames(result, FCutoff=FCutoff, n=n, up = upregulated)
counts.top <- counts(dds, normalized=T)[names.top, ]
metadata <- colData(se) %>% as.data.frame()
gene_names <- rowData(se)[, c("gene_name"), drop = F] %>% as.data.frame()
counts.plt <-
data.frame(counts.top) %>%
tibble::rownames_to_column(var = "ensembl") %>%
tidyr::gather(key = "samplename",
value = "normalized_counts", 2:13) %>%
merge(metadata, by.x="samplename", by.y=0) %>%
merge(gene_names, by.x="ensembl", by.y=0) %>%
mutate(genotype = factor(genotype)) %>%
mutate(genotype = relevel(genotype, "WT")) %>% # "WT" needs to be displayed before "KO"
mutate(gene_name = forcats::fct_reorder(gene_name, normalized_counts, .desc = T))
direction <- ifelse(upregulated, "up", "down")
ggplot(counts.plt,
aes(
x = as.factor(gene_name),
y = normalized_counts,
fill = genotype
)) +
geom_dotplot(
binaxis = 'y',
stackdir = 'center',
dotsize = 0.3,
position = position_dodge(0.8),
fill = "black"
) +
geom_boxplot(outlier.size = 0.3) +
scale_y_log10() +
xlab("Genes") +
ylab("Normalized Counts") +
scale_fill_manual(values = customColors) +
ggtitle(paste0("Top ", n, " ", direction, "-regulated Genes\n tissue: ", tissue)) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(
angle = 45,
hjust = 1,
face = setBold(counts.plt$gene_name, c("Nfe2l1"))
))
}
boxplot.top(res.gastroc, dds.gastroc, se.gastroc, upregulated = T, tissue = "gastroc")
boxplot.top(res.gastroc, dds.gastroc, se.gastroc, upregulated = F, tissue = "gastroc")
boxplot.top(res.soleus, dds.soleus, se.soleus, upregulated = T, tissue = "soleus")
boxplot.top(res.soleus, dds.soleus, se.soleus, upregulated = F, tissue = "soleus")
using the Wald-test stat from the DESeq2 result and
filtering on the set FCutoff=1 and
pCutoff=0.01 yields the following plot:
gastroc_res.filtered <- apply_cutoffs(res.gastroc, colname="gastroc", FCutoff, pCutoff)
Error in apply_cutoffs(res.gastroc, colname = "gastroc", FCutoff, pCutoff) :
object 'res.filtered' not found
ggplot(res.combined, aes(x = diff.exp)) +
geom_bar(aes(fill = diff.exp))
Here are some Venn or Euler (proportional Venn) diagrams to choose from.
Note: using Eulerr diagrams, yields inconsistent plots where the circle sets will be placed at different absolute positions each time the plot function is called. => Thus the plot might need to be called multiple times until the wanted constellation is obtained.
significant overview with all genes:
venn.colors <- c(genes = "white", gastroc = "#FFB08E", soleus = "#FF6638")
gene_sets <- c(
"all genes" = sign_gene_stats$`all genes` - sign_gene_stats$shared_sig_genes,
"all genes&gastroc" = sign_gene_stats$gastroc - sign_gene_stats$shared_sig_genes,
"all genes&soleus" = sign_gene_stats$soleus - sign_gene_stats$shared_sig_genes,
"all genes&gastroc&soleus" = sign_gene_stats$shared_sig_genes
)
# 1st option: euler
plot(euler(gene_sets),
legend = list(side = "right") ,
main = "significant genes per tissue",
fills = venn.colors)
# 2nd option: venn
plot(venn(gene_sets),
# legend = list(side = "right") ,
main = "significant genes per tissue",
fills = venn.colors)
significant grouped:
col_palette <- RColorBrewer::brewer.pal(8, "Dark2")
# venn.colors <- c(gastroc = "#FFB08E", soleus = "#FF6638", col_palette[1:4])
venn.colors <- c(gastroc = "#FFB08E", soleus = "#FF6638", hue_pal()(4))
# sets for the venn/euler diagram (exclusive counts)
gene_sets <- c(
"gastroc" = sign_gene_stats$gastroc - sign_gene_stats$shared_sig_genes,
"soleus" = sign_gene_stats$soleus - sign_gene_stats$shared_sig_genes,
"gastroc&soleus" = sign_gene_stats$shared_sig_genes,
"gastroc&soleus&both down" = sign_gene_stats$`both down`,
"gastroc&soleus&both up" = sign_gene_stats$`both up`,
"gastroc&soleus&ga up, sol down" = sign_gene_stats$`ga up, sol down`,
"gastroc&soleus&ga down, sol up" = sign_gene_stats$`ga down, sol up`
)
# 'gastroc&soleus': if this line is omitted, then all intersects will be denoted
# with 0 (which was the goal to omit)
# 1st option: eulerr
plot(
euler(gene_sets),
quantities = T,
legend = list(side = "right"),
fills = venn.colors,
main = "significant genes"
)
# only the two tissues
gene_sets <- c(
"gastroc" = sign_gene_stats$gastroc - sign_gene_stats$shared_sig_genes,
"soleus" = sign_gene_stats$soleus - sign_gene_stats$shared_sig_genes,
"gastroc&soleus" = sign_gene_stats$shared_sig_genes
)
# 2nd option: euler two tissues
plot(
euler(gene_sets),
# legend = list(side = "right"),
fills = venn.colors,
main = "significant genes"
)
# 3rd option: venn two tissues
plot(
venn(gene_sets),
fills = venn.colors,
main = "significant genes"
)
save(dds.gastroc, dds.soleus, res.gastroc, res.soleus, file = "./data/Robjects/03_DDS.RData")